* Do-file for merging number of cases to COVID stringency index 
* Author: Nursena Aksunger
* Date: 01.10.2021




*-------------------------------------------------------------------------------
*			0. Clean Data
*-------------------------------------------------------------------------------

*			0.1		Basic Data Prep
*-------------------------------------------------------------------------------
qui do "${code}/Data Preparation/dataprep"
gl out "${output}/forPaper/stringency"

cap program drop coefsv
*qui do "${dropbox}/99.Common/coefsv.do"

gl grset graphregion(color(white)) ylabel(, angle(0)) plotregion(margin(sides))

tempfile main
save `main'

*			0.2		Clean Stringency
*-------------------------------------------------------------------------------
import excel using "${raw}\0.NonSurveyData\stringency.xlsx", sheet("Sheet1") firstrow clear allstring
destring stringency_index, replace
gen start = date(start_date, "YMD")
replace start = date(start_date, "DMY") if mi(start)

gen end = date(end_date, "YMD")
replace end = date(end_date, "DMY") if mi(end)

bysort country (start): gen policy = _n
bysort country (start): gen change = stringency_index - stringency_index[_n - 1]

gen duration = end - start + 1
expand duration
gen date = start
bysort country start (end): replace date = date + _n - 1

bysort country policy (date): gen days_post = _n

format date %td
format start %td
format end %td


isid date country
tempfile strind
save `strind'

*			0.3		Clean Daily Case Counts
*-------------------------------------------------------------------------------
import delimited "${raw}/0.NonSurveyData/daily_case_counts.csv", encoding(UTF-8) clear  // change direction of "/" to "\" in path

gen date_td = date(date, "YMD")
drop date
ren date_td date 
format date %td
ren new_cases case_count
tab country

merge 1:1 country date using `strind', gen(_mcase)

replace country = lower(country)
replace country = subinstr(country, " ", "", .)

ren stringency_index strind
replace strind = strind/100

replace case_count = case_count/50.9 if country == "colombia"
replace case_count = case_count/53.8 if country == "kenya"
replace case_count = case_count/29.1 if country == "nepal"
replace case_count = case_count/8.0 if country == "sierraleone"
replace case_count = case_count/13.0 if country == "rwanda"
replace case_count = case_count/206 if country == "nigeria"

egen mcase = max(case_count), by(country)
gen rel_case = case_count/mcase

*tw (line strind date, sort lpattern("-")) (line rel_case date, sort ), by(country)
save "${output}\clean\stringency_cases_ctrydate.dta", replace

merge 1:m country date using `main', gen(_mdate) keepusing(pid) // get all necessary dates
drop pid
duplicates drop country date, force

gen postdate = date if _mdate != 2 & strind > 0 & !mi(strind)
bysort country (postdate): gen minposdate = postdate[1]
replace strind = 0 if mi(strind) & (date < minposdate)
replace case_count = 0 if mi(case_count) &  date < minposdate

*keep if _mdate != 1
*gen mi_strind = mi(strind) + runiform()
*tw  (scatter mi_strind date if mi(strind), mcolor(%5)) (scatter mi_strind date if !mi(strind), mcolor(%5))
lowess strind date if date > mdy(9,1,2019), bw(.001) by(country) gen(_strind) nog
*tw (line _strind date, sort) (line strind date, sort), by(country)
replace strind = _strind if mi(strind)
replace strind = 0 if date < mdy(9,1,2019)

encode country, gen(CTRY)
tsset CTRY date



foreach v in strind case_count{
	tssmooth ma `v'_wk = `v' , window(6 1 0)
	tssmooth ma `v'_fnt = `v' , window(13 1 0)
	tssmooth ma `v'_month = `v' , window(30 1 0)
}

*			0.4		Merge
*-------------------------------------------------------------------------------

merge 1:m country date using `main', nogen



*			0.4		Generate aggregated versions
*-------------------------------------------------------------------------------

* Generate daily, weekly, fortnight, month dummies that are in the same day integer units
gen wk = week(date) + 52*year(date)
bysort wk (date): gen wk_dy = date[1]
gen fnt = floor(date/14)
bysort fnt (date): gen fnt_dy = date[1]
gen mnth = month(date) + 12*year(date)
bysort mnth (date): gen month_dy = date[1]
/*
tw  (scatter wk_dy date, mcolor(%5)) (scatter fnt_dy date, mcolor(%1)) ///
	(scatter month_dy date, mcolor(%1)) (line date date) ///
		if date > mdy(3,1,2020) & date < mdy(9,1,2020)
*/

gen strind_date = strind
gen case_count_date = case_count
gen date_dy = date

*hist change, by(country)
gen ___depression = ___depression_fw
egen maxChange = max(change), by(country)
egen minChange = min(change), by(country)
replace change = 0 if mi(change)
bysort ctry (change): gen maxChangeDate = start[_N]
gen days_post_maxChange = date - maxChangeDate
bysort ctry (change): gen days_post_minChange = date - start[1]

*replace days_post_maxChange = . if abs(days_post_maxChange) > 365
*replace days_post_minChange = . if abs(days_post_minChange) > 365
*tw scatter strind days_post_maxChange, by(ctry)
*tw scatter strind days_post_minChange, by(ctry)

save "${output}\clean\stringency_cases_clean.dta", replace




